Usual Filtering
df = df.query('calib_psfCandidate == 0.0')
df = df.query('deblend_nChild == 0.0')
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0') # it was 1.5 before
#select only few columns after filtering:
cols_select = ['base_SdssCentroid_x', 'base_SdssCentroid_y',
'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
'base_SdssShape_flux']
df = df[cols_select]
# drop all nans
df = df.dropna()
# additional columns
df['radius'] = df.eval(""" ( (ext_shapeHSM_HsmSourceMoments_xx * ext_shapeHSM_HsmSourceMoments_yy) \
- (ext_shapeHSM_HsmSourceMoments_xy**2 ) )**0.25 """)
Shape filtering
https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb
# df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
# df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
I did not do these two filtering, they remove almost all objects.
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')
Filter strongly lensed objects
# exclude strong lens objects <=154 distance
# The shape of lsst.fits file is 3998,3998 and center is 1699,1699.
df['x_center'] = 1699
df['y_center'] = 1699
df['distance'] = ( (df['x[0]'] - df['x_center'])**2 + (df['x[1]'] - df['y_center'])**2 )**0.5
df = df[df.distance > 154]
Imcat script
# create new columns and cleaning (four files)
lc -C -n fN -n id -N '1 2 x' -N '1 2 errx' -N '1 2 g' -n ellip -n flux -n radius < "${M9T}".txt | lc +all 'mag = %flux log10 -2.5 *' | cleancat 10 | lc +all -r 'mag' > "${M9C}".cat
# merge 4 catalogs
mergecats 5 "${MC}".cat "${M9C}".cat "${LC}".cat "${L9C}".cat > ${catalogs}/merge.cat &&
lc -b +all
'x = %x[0][0] %x[1][0] + %x[2][0] + %x[3][0] + 4 / %x[0][1] %x[1][1] + %x[2][1] + %x[3][1] + 4 / 2 vector'
'gm = %g[0][0] %g[1][0] + 2 / %g[0][1] %g[1][1] + 2 / 2 vector'
'gc = %g[2][0] %g[3][0] + 2 / %g[2][1] %g[3][1] + 2 / 2 vector'
'gmd = %g[0][0] %g[1][0] - 2 / %g[0][1] %g[1][1] - 2 / 2 vector'
'gcd = %g[2][0] %g[3][0] - 2 / %g[2][1] %g[3][1] - 2 / 2 vector'
< ${catalogs}/merge.cat > ${final}/final_${i}.cat
Notes
final_text.txt is created by imcat program after merging four lsst files (m,m9,l,l9) after cleaning.
import json, os,sys
import numpy as np
import pandas as pd
import time
# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import ipywidgets
# settings
time_start_notebook = time.time()
pd.set_option('display.max_columns',200)
sns.set(color_codes=True)
plt.style.use('ggplot')
%matplotlib inline
print([(x.__name__, x.__version__) for x in [np,pd,sns,plotly,ipywidgets]])
[('numpy', '1.19.4'), ('pandas', '1.1.4'), ('seaborn', '0.11.0'), ('plotly', '4.14.1'), ('ipywidgets', '7.5.1')]
g_sq = g00 g00 + g10 g10
gmd_sq = gmd0**2 + gmd1**2
!ls ../../data/imcat_cleaned
final_ngals10k_z1.5_nstars300_e3_e7_dmsnoise50_cleancat10_mergecat5_files_0_999.txt
ifile = "../../data/imcat_cleaned/final_ngals10k_z1.5_nstars300_e3_e7_dmsnoise50_cleancat10_mergecat5_files_0_999.txt"
!head -2 $ifile
# fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1]
0 0 0 0 2135 2096 2067 2082 2580.9311 2849.5868 0.0297 0.0319 0.0137 0.0134 0.0299 0.0319 0.0303 0.0306 -0.0952 -0.2551 0.1531 0.1393 -0.0939 -0.2453 0.1209 0.1211 0.27228487 0.20698816 0.26265814 0.1711199 408036.55 406763.65 409150.48 407928.36 4.2354349 4.2063519 4.2733181 4.2441436 -14.026748 -14.023355 -14.029708 -14.02646 0.02895 -0.0579 0.0135 -0.0621 -0.12415 -0.1972 -0.1074 -0.1832
names = "fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1]"
print(names)
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1]
names = ['fN[0][0]','fN[1][0]','fN[2][0]','fN[3][0]',
'id[0][0]','id[1][0]','id[2][0]','id[3][0]',
'x[0]','x[1]',
'errx[0][0]','errx[0][1]','errx[1][0]','errx[1][1]','errx[2][0]',
'errx[2][1]','errx[3][0]','errx[3][1]',
'g[0][0]','g[0][1]','g[1][0]','g[1][1]','g[2][0]','g[2][1]','g[3][0]','g[3][1]',
'ellip[0][0]','ellip[1][0]','ellip[2][0]','ellip[3][0]',
'flux[0][0]','flux[1][0]','flux[2][0]','flux[3][0]',
'radius[0][0]','radius[1][0]','radius[2][0]','radius[3][0]',
'mag[0][0]','mag[1][0]','mag[2][0]','mag[3][0]',
'gm[0]','gm[1]','gc[0]', 'gc[1]',
'gmd[0]','gmd[1]','gcd[0]','gcd[1]']
df = pd.read_csv(ifile,comment='#',engine='python',sep=r'\s\s+',
header=None,names=names)
print(df.shape)
# new columns
# df['g_sq'] = df['g[0][0]'] **2 + df['g[1][0]']**2 # only for imcat 00 and 10
# df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2
df['g_sq'] = df['g[0][0]'] **2 + df['g[0][1]']**2
df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2
df['gm_sq'] = df['gm[0]']**2 + df['gm[1]']**2
df['gc_sq'] = df['gc[0]']**2 + df['gc[1]']**2
df['mag_mono'] = (df['mag[0][0]'] + df['mag[1][0]'] ) / 2
df['mag_chro'] = (df['mag[2][0]'] + df['mag[3][0]'] ) / 2
df.head()
(3255593, 50)
| fN[0][0] | fN[1][0] | fN[2][0] | fN[3][0] | id[0][0] | id[1][0] | id[2][0] | id[3][0] | x[0] | x[1] | errx[0][0] | errx[0][1] | errx[1][0] | errx[1][1] | errx[2][0] | errx[2][1] | errx[3][0] | errx[3][1] | g[0][0] | g[0][1] | g[1][0] | g[1][1] | g[2][0] | g[2][1] | g[3][0] | g[3][1] | ellip[0][0] | ellip[1][0] | ellip[2][0] | ellip[3][0] | flux[0][0] | flux[1][0] | flux[2][0] | flux[3][0] | radius[0][0] | radius[1][0] | radius[2][0] | radius[3][0] | mag[0][0] | mag[1][0] | mag[2][0] | mag[3][0] | gm[0] | gm[1] | gc[0] | gc[1] | gmd[0] | gmd[1] | gcd[0] | gcd[1] | g_sq | gmd_sq | gm_sq | gc_sq | mag_mono | mag_chro | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 2135 | 2096 | 2067 | 2082 | 2580.93110 | 2849.5868 | 0.0297 | 0.0319 | 0.0137 | 0.0134 | 0.0299 | 0.0319 | 0.0303 | 0.0306 | -0.0952 | -0.2551 | 0.1531 | 0.1393 | -0.0939 | -0.2453 | 0.1209 | 0.1211 | 0.272285 | 0.206988 | 0.262658 | 0.171120 | 408036.5500 | 406763.6500 | 409150.4800 | 407928.3600 | 4.235435 | 4.206352 | 4.273318 | 4.244144 | -14.026748 | -14.023355 | -14.029708 | -14.026460 | 0.02895 | -0.05790 | 0.01350 | -0.06210 | -0.12415 | -0.19720 | -0.10740 | -0.18320 | 0.074139 | 0.054301 | 0.004191 | 0.004039 | -14.025052 | -14.028084 |
| 1 | 0 | 0 | 0 | 0 | 4794 | 4601 | 4395 | 4401 | 2003.47690 | 1691.9412 | 0.7515 | 0.7892 | 1.0306 | 1.5368 | 0.7687 | 0.7990 | 1.0311 | 1.5089 | 0.0423 | 0.1264 | -0.9250 | -0.0098 | 0.0498 | 0.1351 | -0.7656 | 0.0058 | 0.133290 | 0.925052 | 0.143986 | 0.765622 | 5129.8816 | 5145.7756 | 5075.5358 | 5121.1177 | 3.591389 | 3.740641 | 3.593138 | 3.757837 | -9.275268 | -9.278627 | -9.263705 | -9.273412 | -0.44135 | 0.05830 | -0.35790 | 0.07045 | 0.48365 | 0.06810 | 0.40770 | 0.06465 | 0.017766 | 0.238555 | 0.198189 | 0.133056 | -9.276948 | -9.268558 |
| 2 | 0 | 0 | 0 | 0 | 4146 | 3989 | 3883 | 3877 | 3319.95920 | 1157.8130 | 0.1221 | 0.1177 | 0.1237 | 0.1226 | 0.1227 | 0.1175 | 0.1268 | 0.1244 | 0.2984 | 0.7404 | 0.5328 | -1.0938 | 0.2935 | 0.7235 | 0.2716 | -0.6233 | 0.798270 | 1.216665 | 0.780765 | 0.679904 | 36835.1330 | 37383.7480 | 36986.8370 | 37253.1110 | 3.852369 | 3.893983 | 3.854637 | 3.895793 | -11.415656 | -11.431707 | -11.420118 | -11.427906 | 0.41560 | -0.17670 | 0.28255 | 0.05010 | -0.11720 | 0.91710 | 0.01095 | 0.67340 | 0.637235 | 0.854808 | 0.203946 | 0.082345 | -11.423682 | -11.424012 |
| 3 | 0 | 0 | 0 | 0 | 5262 | 5123 | 4921 | 4927 | 108.63672 | 2092.8406 | 0.3938 | 0.4327 | 0.4012 | 0.4276 | 0.3952 | 0.4332 | 0.4085 | 0.4268 | -0.2016 | 0.4174 | 0.7200 | -1.2491 | -0.2038 | 0.4063 | 0.0783 | -0.2840 | 0.463536 | 1.441753 | 0.454548 | 0.294596 | 8856.7761 | 8874.4106 | 8826.0475 | 8848.2706 | 3.494549 | 3.502561 | 3.488882 | 3.496959 | -9.868189 | -9.870349 | -9.864416 | -9.867146 | 0.25920 | -0.41585 | -0.06275 | 0.06115 | -0.46080 | 0.83325 | -0.14105 | 0.34515 | 0.214865 | 0.906642 | 0.240116 | 0.007677 | -9.869269 | -9.865781 |
| 4 | 0 | 0 | 0 | 0 | 4222 | 4082 | 3925 | 3921 | 1489.66340 | 1216.1759 | 0.4635 | 0.4180 | 0.4022 | 0.4707 | 0.4664 | 0.4179 | 0.4069 | 0.4755 | 0.8425 | 0.2209 | -1.0517 | -1.4290 | 0.8048 | 0.2047 | -0.5086 | -0.6103 | 0.870978 | 1.774292 | 0.830425 | 0.794443 | 8351.7254 | 8324.8939 | 8445.7476 | 8476.5524 | 3.480478 | 3.458537 | 3.505148 | 3.501773 | -9.804441 | -9.800947 | -9.816595 | -9.820548 | -0.10460 | -0.60405 | 0.14810 | -0.20280 | 0.94710 | 0.82495 | 0.65670 | 0.40750 | 0.758603 | 1.577541 | 0.375818 | 0.063061 | -9.802694 | -9.818572 |
!mkdir -p images
fig,ax = plt.subplots(1,2,figsize=(14,6))
df['gm_sq'].plot.hist(bins=50,ax=ax[0],color='b',label='mono')
ax[0].set_xlabel('gm_sq')
ax[0].legend()
ax[0].set_title('g-squared histogram for mono')
# gcsq
df['gc_sq'].plot.hist(bins=50,ax=ax[1],color='r',label='chro')
ax[1].set_xlabel('gc_sq')
ax[1].legend()
ax[1].set_title('g-squared histogram for chro')
# savefig
plt.savefig('images/gmsq_gcsq_hist.png')
plt.figure(figsize=(12,8))
sns.distplot(df['g_sq'],label='g_sq')
sns.distplot(df['gmd_sq'],label='gmd_sq')
plt.legend()
/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning) /Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
<matplotlib.legend.Legend at 0x7fa58a316090>
import plotly
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
def matrix_of_number_density_from_two_cols(df,xcol,ycol,N):
"""Create grid of number density of two columns
- Find the absolute max from two columns.
- Create N bins -absMax to +absMax.
- Return a matrix of N*N shape.
"""
from itertools import product
# derived variables
xlabel = xcol
ylabel = ycol
xlabel1 = xlabel + '_cat_labels'
ylabel1 = ylabel + '_cat_labels'
xlabel2 = xlabel + '_cat'
ylabel2 = ylabel + '_cat'
colname = 'cat_freq'
# take only xlabel and ylabel columns
dx = df[[xlabel, ylabel]].copy()
# get absolute maximum from two columns
tolerance = 0.0000001
max_abs_xcol_ycol = df[[xcol,ycol]].describe().iloc[[3,-1],:].abs().max().max()
# create 1d array with N+1 values to create N bins
# bins = np.linspace(-max_abs_xcol_ycol-tolerance, max_abs_xcol_ycol+tolerance,N+1)
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
# create N bins
dx[xlabel1] = pd.cut(dx[xlabel], bins, labels=np.arange(N))
dx[ylabel1] = pd.cut(dx[ylabel], bins, labels=np.arange(N))
# count number of points in each bin
dx[colname] = dx.groupby([xlabel1,ylabel1])[xlabel1].transform('count')
# drop duplicates
dx1 = dx.drop_duplicates(subset=[xlabel1,ylabel1])[[xlabel1,ylabel1,colname]]
# use permutation to get the grid of N * N
perms = list(product(range(N), range(N)))
x = [i[0] for i in perms]
y = [i[1] for i in perms]
dx2 = pd.DataFrame({xlabel1: x, ylabel1: y, colname:0})
# update dx2 to merge frequency values
dx2.update(dx2.drop(colname,1).merge(dx1,how='left'))
dx2 = dx2.astype(int)
# z values to plot heatmap
z = dx2[colname].values.reshape(N,N)
z = z.T
return z
def transform_scale(z,transform='linear',scale=None):
"""Transform and scale given 1d numpy array.
transform: linear, log, sqrt, sinh, arcsinh
scale : minmax, zscale
"""
#==================================
# linear, log, sqrt, arcsinh, sinh
#
# we need linear tranform option to compare.
if transform == 'linear':
z = z
if transform == 'log':
z = np.log1p(z)
if transform == 'sqrt':
z = np.sqrt(z)
if transform == 'sinh':
z = np.sinh(z)
if transform == 'arcsinh':
z = np.arcsinh(z)
#===============================
# scaling minmax and zscale
if scale== 'minmax':
z = z / (z.max()-z.min())
if scale == 'zscale':
z = (z-z.mean()) / z.std()
if scale is None:
z = z
return z
def plot_contour(Z,colorscale,x1y1x2y2=None,
title='Contour plot',
xlabel='',
ylabel=''):
"""Plot the contour plot.
Contour plot from given 2d array.
Example:
-----------
z = matrix_of_number_density_from_two_cols(df,'gsq','gmdsq',N)
z1 = transform_scale(z,transform=transform,scale=scale)
plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
title=f'Contour plot: {transform}+{scale}',
xlabel='Bin number of gsq',
ylabel='Bin number of gmsq')
"""
trace1 = go.Contour(z=Z, colorscale=colorscale)
axis_layout = dict(
showticklabels = True
)
xaxis = {**axis_layout, **{'title':f'{xlabel}'}}
yaxis = {**axis_layout, **{'title':f'{ylabel}'}}
layout = go.Layout(
width=1000,
height=1000,
autosize=False,
title=f'{title} ',
xaxis = xaxis,
yaxis = yaxis,
)
data = [trace1]
if x1y1x2y2:
# center line
p2x1, p2y1 = 0,0
p2x2, p2y2 = 99,99
sc1 = go.Scatter(x=[p2x1,p2x2],
y=[p2y1,p2y2],
mode = 'lines+markers',
name = f'line ({p2x1},{p2y1}) to ({p2x2},{p2y2})')
sc2 = go.Scatter(x=[x1y1x2y2[0],x1y1x2y2[2]],
y=[x1y1x2y2[1],x1y1x2y2[3]],
mode = 'lines+markers',
name = f'line ({x1y1x2y2[0]},{x1y1x2y2[1]}) \
to ({x1y1x2y2[2]},{x1y1x2y2[3]})')
data = [trace1, sc1, sc2]
fig = go.Figure( data=data, layout=layout )
# update figure
fig.update_layout(
xaxis = dict(tickmode = 'linear',dtick = 5),
yaxis = dict(tickmode = 'linear',dtick = 5),
)
iplot(fig)
return None
N = 100
transform='log' # linear log sqrt sinh
scale='minmax'
xcol,ycol = 'g_sq', 'gmd_sq'
z = matrix_of_number_density_from_two_cols(df,xcol,ycol,N)
z1 = transform_scale(z,transform=transform,scale=scale)
plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
title=f'Contour plot for transform = {transform} and scaling = {scale}',
xlabel=f'Bin number of {xcol}',
ylabel=f'Bin number of {ycol}')
N = 100
xcol = 'g_sq'
ycol = 'gmd_sq'
max_abs_xcol_ycol = df[[xcol,ycol]].abs().max().max()
print(max_abs_xcol_ycol)
df[df[xcol]==max_abs_xcol_ycol]
3.99995773
| fN[0][0] | fN[1][0] | fN[2][0] | fN[3][0] | id[0][0] | id[1][0] | id[2][0] | id[3][0] | x[0] | x[1] | errx[0][0] | errx[0][1] | errx[1][0] | errx[1][1] | errx[2][0] | errx[2][1] | errx[3][0] | errx[3][1] | g[0][0] | g[0][1] | g[1][0] | g[1][1] | g[2][0] | g[2][1] | g[3][0] | g[3][1] | ellip[0][0] | ellip[1][0] | ellip[2][0] | ellip[3][0] | flux[0][0] | flux[1][0] | flux[2][0] | flux[3][0] | radius[0][0] | radius[1][0] | radius[2][0] | radius[3][0] | mag[0][0] | mag[1][0] | mag[2][0] | mag[3][0] | gm[0] | gm[1] | gc[0] | gc[1] | gmd[0] | gmd[1] | gcd[0] | gcd[1] | g_sq | gmd_sq | gm_sq | gc_sq | mag_mono | mag_chro | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 518869 | 161 | 161 | 161 | 161 | 5944 | 6271 | 5703 | 5701 | 583.26403 | 2902.3778 | 1.2972 | 1.3448 | 1.2745 | 1.3142 | 1.2781 | 1.289 | 1.272 | 1.2884 | -0.7582 | -1.8507 | 0.278 | 1.5816 | 0.206 | 1.237 | 0.2168 | 1.3696 | 1.999989 | 1.605846 | 1.254035 | 1.386653 | 2649.7412 | 2740.375 | 3023.9938 | 3092.3356 | 3.454795 | 3.473957 | 3.678207 | 3.702682 | -8.558009 | -8.594525 | -8.701452 | -8.725717 | -0.2401 | -0.13455 | 0.2114 | 1.3033 | -0.5181 | -1.71615 | -0.0054 | -0.0663 | 3.999958 | 3.213598 | 0.075752 | 1.743281 | -8.576267 | -8.713584 |
# create 1d array with N+1 values to create N bins
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
# bins dict
bins_dict = {i:v for i,v in enumerate(bins)}
# create N bins
ser_ycol_bins = pd.cut(df[ycol], bins=bins,)
df['g_sq_bins'] = ser_ycol_bins
df[['g_sq','g_sq_bins']].head()
| g_sq | g_sq_bins | |
|---|---|---|
| 0 | 0.074139 | (0.04, 0.08] |
| 1 | 0.017766 | (0.2, 0.24] |
| 2 | 0.637235 | (0.84, 0.88] |
| 3 | 0.214865 | (0.88, 0.92] |
| 4 | 0.758603 | (1.56, 1.6] |
bins[10], bins[11]
(0.39999577300000005, 0.43999535030000003)
What is the corresponding gmsq value to y-axis bin number 10 (11th bin)?
The 100*100 bin is created from absMax of g_sq and gmd_sq. Then the line 0 to absMax is divided into 100 parts and bins are created.
bin 10 is used from above contour plot.
# take all points where gmd_sq > 10th bin
df_gmd_sq_lt_bin10 = df[df.gmd_sq > bins[10]]
print(df_gmd_sq_lt_bin10.shape)
df_gmd_sq_lt_bin10.head()
(1256982, 57)
| fN[0][0] | fN[1][0] | fN[2][0] | fN[3][0] | id[0][0] | id[1][0] | id[2][0] | id[3][0] | x[0] | x[1] | errx[0][0] | errx[0][1] | errx[1][0] | errx[1][1] | errx[2][0] | errx[2][1] | errx[3][0] | errx[3][1] | g[0][0] | g[0][1] | g[1][0] | g[1][1] | g[2][0] | g[2][1] | g[3][0] | g[3][1] | ellip[0][0] | ellip[1][0] | ellip[2][0] | ellip[3][0] | flux[0][0] | flux[1][0] | flux[2][0] | flux[3][0] | radius[0][0] | radius[1][0] | radius[2][0] | radius[3][0] | mag[0][0] | mag[1][0] | mag[2][0] | mag[3][0] | gm[0] | gm[1] | gc[0] | gc[1] | gmd[0] | gmd[1] | gcd[0] | gcd[1] | g_sq | gmd_sq | gm_sq | gc_sq | mag_mono | mag_chro | g_sq_bins | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | 0 | 0 | 0 | 4146 | 3989 | 3883 | 3877 | 3319.95920 | 1157.81300 | 0.1221 | 0.1177 | 0.1237 | 0.1226 | 0.1227 | 0.1175 | 0.1268 | 0.1244 | 0.2984 | 0.7404 | 0.5328 | -1.0938 | 0.2935 | 0.7235 | 0.2716 | -0.6233 | 0.798270 | 1.216665 | 0.780765 | 0.679904 | 36835.1330 | 37383.7480 | 36986.8370 | 37253.1110 | 3.852369 | 3.893983 | 3.854637 | 3.895793 | -11.415656 | -11.431707 | -11.420118 | -11.427906 | 0.4156 | -0.17670 | 0.28255 | 0.05010 | -0.1172 | 0.91710 | 0.01095 | 0.67340 | 0.637235 | 0.854808 | 0.203946 | 0.082345 | -11.423682 | -11.424012 | (0.84, 0.88] |
| 3 | 0 | 0 | 0 | 0 | 5262 | 5123 | 4921 | 4927 | 108.63672 | 2092.84060 | 0.3938 | 0.4327 | 0.4012 | 0.4276 | 0.3952 | 0.4332 | 0.4085 | 0.4268 | -0.2016 | 0.4174 | 0.7200 | -1.2491 | -0.2038 | 0.4063 | 0.0783 | -0.2840 | 0.463536 | 1.441753 | 0.454548 | 0.294596 | 8856.7761 | 8874.4106 | 8826.0475 | 8848.2706 | 3.494549 | 3.502561 | 3.488882 | 3.496959 | -9.868189 | -9.870349 | -9.864416 | -9.867146 | 0.2592 | -0.41585 | -0.06275 | 0.06115 | -0.4608 | 0.83325 | -0.14105 | 0.34515 | 0.214865 | 0.906642 | 0.240116 | 0.007677 | -9.869269 | -9.865781 | (0.88, 0.92] |
| 4 | 0 | 0 | 0 | 0 | 4222 | 4082 | 3925 | 3921 | 1489.66340 | 1216.17590 | 0.4635 | 0.4180 | 0.4022 | 0.4707 | 0.4664 | 0.4179 | 0.4069 | 0.4755 | 0.8425 | 0.2209 | -1.0517 | -1.4290 | 0.8048 | 0.2047 | -0.5086 | -0.6103 | 0.870978 | 1.774292 | 0.830425 | 0.794443 | 8351.7254 | 8324.8939 | 8445.7476 | 8476.5524 | 3.480478 | 3.458537 | 3.505148 | 3.501773 | -9.804441 | -9.800947 | -9.816595 | -9.820548 | -0.1046 | -0.60405 | 0.14810 | -0.20280 | 0.9471 | 0.82495 | 0.65670 | 0.40750 | 0.758603 | 1.577541 | 0.375818 | 0.063061 | -9.802694 | -9.818572 | (1.56, 1.6] |
| 5 | 0 | 0 | 0 | 0 | 3704 | 3572 | 3487 | 3507 | 42.24975 | 905.77085 | 0.3016 | 0.3021 | 0.2832 | 0.3201 | 0.3023 | 0.3019 | 0.2865 | 0.3230 | 0.4066 | 0.2078 | -1.3324 | -0.4557 | 0.3359 | 0.2131 | -0.3613 | -0.0610 | 0.456623 | 1.408173 | 0.397794 | 0.366413 | 12258.0780 | 12242.1090 | 12202.5980 | 12274.5210 | 3.512466 | 3.510293 | 3.502338 | 3.517241 | -10.221056 | -10.219641 | -10.216131 | -10.222511 | -0.4629 | -0.12395 | -0.01270 | 0.07605 | 0.8695 | 0.33175 | 0.34860 | 0.13705 | 0.208504 | 0.866088 | 0.229640 | 0.005945 | -10.220349 | -10.219321 | (0.84, 0.88] |
| 13 | 0 | 0 | 0 | 0 | 2934 | 2858 | 2796 | 2812 | 1558.77430 | 265.04665 | 1.7807 | 1.1846 | 1.1449 | 1.7173 | 1.7871 | 1.1834 | 1.1315 | 1.6661 | 0.9273 | 0.0129 | -1.0095 | 0.1388 | 0.9121 | 0.0176 | -0.8063 | 0.1116 | 0.927390 | 1.018997 | 0.912270 | 0.813987 | 5062.6477 | 5027.7626 | 5048.8434 | 5014.3198 | 4.120589 | 3.976149 | 4.111542 | 3.966985 | -9.260944 | -9.253437 | -9.257980 | -9.250530 | -0.0411 | 0.07585 | 0.05290 | 0.06460 | 0.9684 | -0.06295 | 0.85920 | -0.04700 | 0.860052 | 0.941761 | 0.007442 | 0.006972 | -9.257191 | -9.254255 | (0.92, 0.96] |
# fN[0][0] ==> lsst_mono_z1.5_000.fits
# fN[1][0] ==> lsst_mono90_z1.5_000.fits
#
# id[0][0] ==> id of given object for mono file number 0
# take only first file number 0 (it has m,m9,c and c9)
df_gmd_sq_lt_bin10_file0 = df_gmd_sq_lt_bin10[df_gmd_sq_lt_bin10['fN[0][0]'] == 0]
# add radius for mono
df_gmd_sq_lt_bin10_file0['radius_mono'] = \
(df_gmd_sq_lt_bin10_file0['radius[0][0]'] +
df_gmd_sq_lt_bin10_file0['radius[1][0]'] ) /2.0
print(df_gmd_sq_lt_bin10_file0.shape)
df_gmd_sq_lt_bin10_file0.head()
(1011, 58)
| fN[0][0] | fN[1][0] | fN[2][0] | fN[3][0] | id[0][0] | id[1][0] | id[2][0] | id[3][0] | x[0] | x[1] | errx[0][0] | errx[0][1] | errx[1][0] | errx[1][1] | errx[2][0] | errx[2][1] | errx[3][0] | errx[3][1] | g[0][0] | g[0][1] | g[1][0] | g[1][1] | g[2][0] | g[2][1] | g[3][0] | g[3][1] | ellip[0][0] | ellip[1][0] | ellip[2][0] | ellip[3][0] | flux[0][0] | flux[1][0] | flux[2][0] | flux[3][0] | radius[0][0] | radius[1][0] | radius[2][0] | radius[3][0] | mag[0][0] | mag[1][0] | mag[2][0] | mag[3][0] | gm[0] | gm[1] | gc[0] | gc[1] | gmd[0] | gmd[1] | gcd[0] | gcd[1] | g_sq | gmd_sq | gm_sq | gc_sq | mag_mono | mag_chro | g_sq_bins | radius_mono | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 0 | 0 | 0 | 0 | 4146 | 3989 | 3883 | 3877 | 3319.95920 | 1157.81300 | 0.1221 | 0.1177 | 0.1237 | 0.1226 | 0.1227 | 0.1175 | 0.1268 | 0.1244 | 0.2984 | 0.7404 | 0.5328 | -1.0938 | 0.2935 | 0.7235 | 0.2716 | -0.6233 | 0.798270 | 1.216665 | 0.780765 | 0.679904 | 36835.1330 | 37383.7480 | 36986.8370 | 37253.1110 | 3.852369 | 3.893983 | 3.854637 | 3.895793 | -11.415656 | -11.431707 | -11.420118 | -11.427906 | 0.4156 | -0.17670 | 0.28255 | 0.05010 | -0.1172 | 0.91710 | 0.01095 | 0.67340 | 0.637235 | 0.854808 | 0.203946 | 0.082345 | -11.423682 | -11.424012 | (0.84, 0.88] | 3.873176 |
| 3 | 0 | 0 | 0 | 0 | 5262 | 5123 | 4921 | 4927 | 108.63672 | 2092.84060 | 0.3938 | 0.4327 | 0.4012 | 0.4276 | 0.3952 | 0.4332 | 0.4085 | 0.4268 | -0.2016 | 0.4174 | 0.7200 | -1.2491 | -0.2038 | 0.4063 | 0.0783 | -0.2840 | 0.463536 | 1.441753 | 0.454548 | 0.294596 | 8856.7761 | 8874.4106 | 8826.0475 | 8848.2706 | 3.494549 | 3.502561 | 3.488882 | 3.496959 | -9.868189 | -9.870349 | -9.864416 | -9.867146 | 0.2592 | -0.41585 | -0.06275 | 0.06115 | -0.4608 | 0.83325 | -0.14105 | 0.34515 | 0.214865 | 0.906642 | 0.240116 | 0.007677 | -9.869269 | -9.865781 | (0.88, 0.92] | 3.498555 |
| 4 | 0 | 0 | 0 | 0 | 4222 | 4082 | 3925 | 3921 | 1489.66340 | 1216.17590 | 0.4635 | 0.4180 | 0.4022 | 0.4707 | 0.4664 | 0.4179 | 0.4069 | 0.4755 | 0.8425 | 0.2209 | -1.0517 | -1.4290 | 0.8048 | 0.2047 | -0.5086 | -0.6103 | 0.870978 | 1.774292 | 0.830425 | 0.794443 | 8351.7254 | 8324.8939 | 8445.7476 | 8476.5524 | 3.480478 | 3.458537 | 3.505148 | 3.501773 | -9.804441 | -9.800947 | -9.816595 | -9.820548 | -0.1046 | -0.60405 | 0.14810 | -0.20280 | 0.9471 | 0.82495 | 0.65670 | 0.40750 | 0.758603 | 1.577541 | 0.375818 | 0.063061 | -9.802694 | -9.818572 | (1.56, 1.6] | 3.469507 |
| 5 | 0 | 0 | 0 | 0 | 3704 | 3572 | 3487 | 3507 | 42.24975 | 905.77085 | 0.3016 | 0.3021 | 0.2832 | 0.3201 | 0.3023 | 0.3019 | 0.2865 | 0.3230 | 0.4066 | 0.2078 | -1.3324 | -0.4557 | 0.3359 | 0.2131 | -0.3613 | -0.0610 | 0.456623 | 1.408173 | 0.397794 | 0.366413 | 12258.0780 | 12242.1090 | 12202.5980 | 12274.5210 | 3.512466 | 3.510293 | 3.502338 | 3.517241 | -10.221056 | -10.219641 | -10.216131 | -10.222511 | -0.4629 | -0.12395 | -0.01270 | 0.07605 | 0.8695 | 0.33175 | 0.34860 | 0.13705 | 0.208504 | 0.866088 | 0.229640 | 0.005945 | -10.220349 | -10.219321 | (0.84, 0.88] | 3.511380 |
| 13 | 0 | 0 | 0 | 0 | 2934 | 2858 | 2796 | 2812 | 1558.77430 | 265.04665 | 1.7807 | 1.1846 | 1.1449 | 1.7173 | 1.7871 | 1.1834 | 1.1315 | 1.6661 | 0.9273 | 0.0129 | -1.0095 | 0.1388 | 0.9121 | 0.0176 | -0.8063 | 0.1116 | 0.927390 | 1.018997 | 0.912270 | 0.813987 | 5062.6477 | 5027.7626 | 5048.8434 | 5014.3198 | 4.120589 | 3.976149 | 4.111542 | 3.966985 | -9.260944 | -9.253437 | -9.257980 | -9.250530 | -0.0411 | 0.07585 | 0.05290 | 0.06460 | 0.9684 | -0.06295 | 0.85920 | -0.04700 | 0.860052 | 0.941761 | 0.007442 | 0.006972 | -9.257191 | -9.254255 | (0.92, 0.96] | 4.048369 |
For example, take points:
Lower line below the diagonal line
point on x-axis: x1,y1 = 10,0
point on y-axis: x2,y2 = 99,90
here 20,0,99,80 are bin number, their values are obtained from bins_dict
x1,y1 = bins_dict[10], bins_dict[0]
x2,y2 = bins_dict[99], bins_dict[90]
Equation of straight line:
y-y1 = y2-y1 * (x-x1)
-----
x2-x1
boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
N = 100
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
bins_dict = {i:v for i,v in enumerate(bins)}
# points from contour plot
x1y1x2y2 = [10,0,99,90]
print(x1y1x2y2)
# actual values of x1, y1, x2, and y2
x1,y1 = bins_dict[x1y1x2y2[0]], bins_dict[x1y1x2y2[1]]
x2,y2 = bins_dict[x1y1x2y2[2]], bins_dict[x1y1x2y2[3]]
df['above'] = df.eval(" ( (@x2-@x1) * gmd_sq ) >= ( (@y2-@y1) * (g_sq - @x1 )) ")
print(df['above'].value_counts())
print()
print(df['above'].value_counts(normalize=True))
[10, 0, 99, 90] True 2457725 False 797868 Name: above, dtype: int64 True 0.754924 False 0.245076 Name: above, dtype: float64
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='b',label='Above')
nabove = len(df[df.above==True])
nbelow = len(df[df.above==False])
above_pct = nabove/(nabove+nbelow)*100
below_pct = 100-above_pct
text = f'Above = {nabove:,} ({above_pct:,.0f}%)\nBelow = {nbelow:,} ( {below_pct:,.0f}%)'
plt.text(0.5,2_000,text,fontsize=14)
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms for above and below cases', fontsize=20)
df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
Text(0.5, 1.0, 'gm_sq histograms below the boundary')
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5)
plt.xlabel('gm_sq')
plt.title('gm_sq histogram', fontsize=20)
Text(0.5, 1.0, 'gm_sq histogram')
df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
Text(0.5, 1.0, 'gm_sq histograms below the boundary')
rows_before = df.shape[0]
df = df[df.above==True]
print(f'Before rows: {rows_before}, After rows: {df.shape[0]}')
Before rows: 3255593, After rows: 2457725
plt.figure(figsize=(12,8))
sns.histplot(df['gm_sq'],label='gm_sq')
plt.legend()
plt.savefig('images/gmsq_histogram_after_cleaning.png',dpi=300)
Equation of straight line:
y-y1 = y2-y1 * (x-x1)
-----
x2-x1
boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
fig,ax = plt.subplots(1,2,figsize=(16,8))
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])
ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)
ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)
# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])
ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)
ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)
# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')
# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()
# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2)
y1 = x + c
y2 = x - c
# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')
# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2)
y1 = x + c
y2 = x - c
ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')
#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')
plt.show()
# Remove outliers for gc0 vs gm0
n = 10
sigma = (df['gm[0]'] - df['gc[0]']).std()
d = n * sigma
c = d/np.sqrt(2)
cond_upper = (df['gc[0]'] - df['gm[0]'] <= c)
cond_below = (df['gm[0]'] - df['gc[0]'] <= c)
df = df[ cond_upper & cond_below ]
# df.plot.scatter(x='gm[0]',y='gc[0]')
# Remove outliers for gc1 vs gm1
n = 10
sigma = (df['gm[1]'] - df['gc[1]']).std()
d = n * sigma
c = d/np.sqrt(2)
cond_upper = (df['gc[1]'] - df['gm[1]'] <= c)
cond_below = (df['gm[1]'] - df['gc[1]'] <= c)
df = df[ cond_upper & cond_below ]
# df.plot.scatter(x='gm[1]',y='gc[1]')
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])
ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)
ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)
# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')
# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()
# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2)
y1 = x + c
y2 = x - c
# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')
# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2)
y1 = x + c
y2 = x - c
ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')
#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')
plt.show()
df.filter(regex='mag').head(2)
| mag[0][0] | mag[1][0] | mag[2][0] | mag[3][0] | mag_mono | mag_chro | |
|---|---|---|---|---|---|---|
| 0 | -14.026748 | -14.023355 | -14.029708 | -14.026460 | -14.025052 | -14.028084 |
| 1 | -9.275268 | -9.278627 | -9.263705 | -9.273412 | -9.276948 | -9.268558 |
df[['gm_sq','gc_sq']].describe()
| gm_sq | gc_sq | |
|---|---|---|
| count | 2.446826e+06 | 2.446826e+06 |
| mean | 8.648741e-02 | 6.313123e-02 |
| std | 1.482750e-01 | 1.307438e-01 |
| min | 5.000000e-09 | 2.500000e-09 |
| 25% | 9.274156e-03 | 6.070600e-03 |
| 50% | 2.927431e-02 | 1.842514e-02 |
| 75% | 9.363932e-02 | 5.898599e-02 |
| max | 1.989050e+00 | 3.448928e+00 |
def plot_bin_mag_mono_chro(nbins,show=False,ret=False):
import os
if not os.path.isdir('images'):
os.makedirs('images')
df['bins_mag_mono'] = pd.cut(df['mag_mono'],nbins)
df['bins_mag_chro'] = pd.cut(df['mag_chro'],nbins)
# show the text of bin counts
text_mono = df.groupby('bins_mag_mono')['gm_sq'].agg(['count', 'mean'])
text_mono = text_mono.reset_index().assign(pct_change = lambda x: x['mean'].pct_change())
text_mono = text_mono.to_string().replace('mean','gm_sq')
text_chro = df.groupby('bins_mag_chro')['gc_sq'].agg(['count', 'mean'])
text_chro = text_chro.reset_index().assign(pct_change = lambda x: x['mean'].pct_change())
text_chro = text_chro.to_string().replace('mean','gc_sq')
# data to plot
df_mono_gm_sq_mean = df.groupby('bins_mag_mono')['gm_sq'].mean()
df_chro_gm_sq_mean = df.groupby('bins_mag_chro')['gc_sq'].mean()
# plot
fig,ax = plt.subplots(1,2,figsize=(12,8))
# mono (plot the magnitude mean in each bins)
df_mono_gm_sq_mean.plot(marker='o',ax=ax[0])
ax[0].tick_params(axis='x', rotation=90)
ax[0].set_ylabel('gm_sq_mean',fontsize=18)
ax[0].set_xlabel('bin_mag_mono',fontsize=18)
ax[0].set_title(f'gm_sq per magnitude bins with nbins = {nbins}')
ax[0].text(0,0.5,text_mono,fontsize=14,va='center')
ax[0].set_ylim(0,1)
ax[0].set_yticks(np.arange(0, 1, step=0.1))
# chro
df_chro_gm_sq_mean.plot(marker='o',ax=ax[1])
ax[1].tick_params(axis='x', rotation=90)
ax[1].set_ylabel('gc_sq_mean',fontsize=18)
ax[1].set_xlabel('bin_mag_chro',fontsize=18)
ax[1].set_title(f'gc_sq per magnitude bins with nbins = {nbins}')
ax[1].text(0,0.5,text_chro,fontsize=14,va='center')
ax[1].set_ylim(0,1)
ax[1].set_yticks(np.arange(0, 1, step=0.1))
plt.tight_layout()
plt.savefig(f'images/bin_mag_mono_chro_{nbins}.png')
if show:
plt.show()
plt.close()
if ret:
return df_mono_gm_sq_mean, df_chro_gm_sq_mean
for nbins in range(5,15):
plot_bin_mag_mono_chro(nbins,show=True)
df['mag_mono'].describe()
count 2.446826e+06 mean -1.026726e+01 std 1.331125e+00 min -1.711182e+01 25% -1.101521e+01 50% -9.922035e+00 75% -9.214447e+00 max -6.758266e+00 Name: mag_mono, dtype: float64
df['mag_mono'].plot.hist(bins=100)
<matplotlib.axes._subplots.AxesSubplot at 0x7fa191ac6d10>
nbins = 9 # the bin number looks good in above gm_sq plots
df_mono, df_chro = plot_bin_mag_mono_chro(nbins,show=True,ret=True)
# from plot above I can see from Kth point graph goes up.
# graph is flat upto Kth point then increases linearly to top of curve.
num_start_increasing = 3
df_mono.index[num_start_increasing]
Interval(-13.661, -12.51, closed='right')
# how to choose the bottom of the slope mathematically?
# try to see pct change
df_mono.pct_change().to_frame('gm_sq_pct_change').style.background_gradient(low=0,high=0.9)
| gm_sq_pct_change | |
|---|---|
| bins_mag_mono | |
| (-17.122, -15.961] | nan |
| (-15.961, -14.811] | 0.539832 |
| (-14.811, -13.661] | -0.623929 |
| (-13.661, -12.51] | -0.126210 |
| (-12.51, -11.36] | 0.921237 |
| (-11.36, -10.209] | 0.618263 |
| (-10.209, -9.059] | 0.091997 |
| (-9.059, -7.909] | 0.309616 |
| (-7.909, -6.758] | 0.207275 |
# look at smaller part than
df_mono_left = df_mono.pct_change()\
.loc[df_mono.pct_change().index < df_mono.pct_change().idxmax()]
df_mono_left
bins_mag_mono (-17.122, -15.961] NaN (-15.961, -14.811] 0.539832 (-14.811, -13.661] -0.623929 (-13.661, -12.51] -0.126210 Name: gm_sq, dtype: float64
df_mono.idxmax().left, df_mono.idxmax().right
(-7.909, -6.758)
# look at case when nbinsK = 9 and when the curve is going up
# index for when curve goes linearly up after initial flat
idx_bottom_of_slope_mono = [df_mono.index[num_start_increasing].left,
df_mono.index[num_start_increasing].right
]
mag_low_nbinsK_mono = np.mean(idx_bottom_of_slope_mono)
# index for peak of curve
idx_peak_mono = [df_mono.idxmax().left,
df_mono.idxmax().right
]
mag_high_nbinsK_mono = np.mean(idx_peak_mono)
idx_bottom_of_slope_mono, idx_peak_mono, mag_low_nbinsK_mono, mag_high_nbinsK_mono
([-13.661, -12.51], [-7.909, -6.758], -13.0855, -7.3335)
from scipy.optimize import curve_fit
xcol = 'mag_mono'
ycol = 'gm_sq'
x = df.query(""" @mag_low_nbinsK_mono < mag_mono < @mag_high_nbinsK_mono """)[xcol].to_numpy()
y = df.query(""" @mag_low_nbinsK_mono < mag_mono < @mag_high_nbinsK_mono """)[ycol].to_numpy()
def func(x, a, b):
return a*x + b
params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)
print(f'magnitude ranges for mono: {mag_low_nbinsK_mono}, {mag_high_nbinsK_mono}')
print(f'fitting params for mono: {a}, {b}' )
magnitude ranges for mono: -13.0855, -7.3335 fitting params for mono: 0.02, 0.29
mag_low_nbinsK_mono
-13.0855
def magnitude_weight_mono(mag):
if mag < mag_low_nbinsK_mono:
return 1/ (mag_low_nbinsK_mono *a + b)
else:
return 1/ (a*mag + b)
df['wt_mag_mono'] = df['mag_mono'].apply(magnitude_weight_mono)
df['wt_mag_mono'] = df['wt_mag_mono'] / df['wt_mag_mono'].mean() # normalize by mean
df['mag_chro'].plot.hist(bins=100)
<matplotlib.axes._subplots.AxesSubplot at 0x7fa190a7ab90>
nbins = 9 # the bin number looks good in above gm_sq plots
df_mono, df_chro = plot_bin_mag_mono_chro(nbins,show=True,ret=True)
# from plot above I can see from Kth point graph goes up.
# graph is flat upto Kth point then increases linearly to top of curve.
num_start_increasing = 3
df_chro.index[num_start_increasing]
Interval(-14.075, -12.813, closed='right')
# look at case when nbinsK = 9 and when the curve is going up
# index for when curve goes linearly up after initial flat
idx_bottom_of_slope_chro = [df_chro.index[num_start_increasing].left,
df_chro.index[num_start_increasing].right
]
mag_low_nbinsK_chro = np.mean(idx_bottom_of_slope_chro)
# index for peak of curve
idx_peak_chro = [df_chro.idxmax().left,
df_chro.idxmax().right
]
mag_high_nbinsK_chro = np.mean(idx_peak_chro)
idx_bottom_of_slope_chro, idx_peak_chro, mag_low_nbinsK_chro, mag_high_nbinsK_chro
([-14.075, -12.813], [-7.764, -6.502], -13.443999999999999, -7.133)
from scipy.optimize import curve_fit
xcol = 'mag_chro'
ycol = 'gc_sq'
x = df.query(""" @mag_low_nbinsK_chro < mag_chro < @mag_high_nbinsK_chro """)[xcol].to_numpy()
y = df.query(""" @mag_low_nbinsK_chro < mag_chro < @mag_high_nbinsK_chro """)[ycol].to_numpy()
def func(x, a, b):
return a*x + b
params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)
print(f'magnitude ranges for chro: {mag_low_nbinsK_chro}, {mag_high_nbinsK_chro}')
print(f'fitting params for chro: {a}, {b}' )
magnitude ranges for chro: -13.443999999999999, -7.133 fitting params for chro: 0.02, 0.22
mag_low_nbinsK_chro
-13.443999999999999
def magnitude_weight_chro(mag):
if mag < mag_low_nbinsK_chro:
return 1/ (mag_low_nbinsK_chro*a + b)
else:
return 1/ (a*mag + b)
df['wt_mag_chro'] = df['mag_chro'].apply(magnitude_weight_chro)
df['wt_mag_chro'] = df['wt_mag_chro'] / df['wt_mag_chro'].mean() # normalize by mean
# mean
df['wt_mag'] = (df['wt_mag_mono'] + df['wt_mag_chro']) / 2
# df.drop(['wt_mag_chro','wt_mag_mono'],axis=1,inplace=True)
df.iloc[:2,-7:]
| g_sq_bins | above | bins_mag_mono | bins_mag_chro | wt_mag_mono | wt_mag_chro | wt_mag | |
|---|---|---|---|---|---|---|---|
| 0 | (0.04, 0.08] | True | (-14.811, -13.661] | (-14.075, -12.813] | 2.594100 | -9.885015 | -3.645458 |
| 1 | (0.2, 0.24] | True | (-10.209, -9.059] | (-10.289, -9.026] | 0.702531 | 13.953099 | 7.327815 |
c2 = (dx * dx - dy * dy) / (r * r);
s2 = 2 * dx * dy / (r * r);
eX = s2 * e[0] + c2 * e[1];
eesum += eX * eX * w[0] * w[0];
eTsum[bin] -= (c2 * e[0] + s2 * e[1]) * w[0];
df.head(2)
| fN[0][0] | fN[1][0] | fN[2][0] | fN[3][0] | id[0][0] | id[1][0] | id[2][0] | id[3][0] | x[0] | x[1] | errx[0][0] | errx[0][1] | errx[1][0] | errx[1][1] | errx[2][0] | errx[2][1] | errx[3][0] | errx[3][1] | g[0][0] | g[0][1] | g[1][0] | g[1][1] | g[2][0] | g[2][1] | g[3][0] | g[3][1] | ellip[0][0] | ellip[1][0] | ellip[2][0] | ellip[3][0] | flux[0][0] | flux[1][0] | flux[2][0] | flux[3][0] | radius[0][0] | radius[1][0] | radius[2][0] | radius[3][0] | mag[0][0] | mag[1][0] | mag[2][0] | mag[3][0] | gm[0] | gm[1] | gc[0] | gc[1] | gmd[0] | gmd[1] | gcd[0] | gcd[1] | g_sq | gmd_sq | gm_sq | gc_sq | mag_mono | mag_chro | g_sq_bins | above | bins_mag_mono | bins_mag_chro | wt_mag_mono | wt_mag_chro | wt_mag | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 2135 | 2096 | 2067 | 2082 | 2580.9311 | 2849.5868 | 0.0297 | 0.0319 | 0.0137 | 0.0134 | 0.0299 | 0.0319 | 0.0303 | 0.0306 | -0.0952 | -0.2551 | 0.1531 | 0.1393 | -0.0939 | -0.2453 | 0.1209 | 0.1211 | 0.272285 | 0.206988 | 0.262658 | 0.171120 | 408036.5500 | 406763.6500 | 409150.4800 | 407928.3600 | 4.235435 | 4.206352 | 4.273318 | 4.244144 | -14.026748 | -14.023355 | -14.029708 | -14.026460 | 0.02895 | -0.0579 | 0.0135 | -0.06210 | -0.12415 | -0.1972 | -0.1074 | -0.18320 | 0.074139 | 0.054301 | 0.004191 | 0.004039 | -14.025052 | -14.028084 | (0.04, 0.08] | True | (-14.811, -13.661] | (-14.075, -12.813] | 2.594100 | -9.885015 | -3.645458 |
| 1 | 0 | 0 | 0 | 0 | 4794 | 4601 | 4395 | 4401 | 2003.4769 | 1691.9412 | 0.7515 | 0.7892 | 1.0306 | 1.5368 | 0.7687 | 0.7990 | 1.0311 | 1.5089 | 0.0423 | 0.1264 | -0.9250 | -0.0098 | 0.0498 | 0.1351 | -0.7656 | 0.0058 | 0.133290 | 0.925052 | 0.143986 | 0.765622 | 5129.8816 | 5145.7756 | 5075.5358 | 5121.1177 | 3.591389 | 3.740641 | 3.593138 | 3.757837 | -9.275268 | -9.278627 | -9.263705 | -9.273412 | -0.44135 | 0.0583 | -0.3579 | 0.07045 | 0.48365 | 0.0681 | 0.4077 | 0.06465 | 0.017766 | 0.238555 | 0.198189 | 0.133056 | -9.276948 | -9.268558 | (0.2, 0.24] | True | (-10.209, -9.059] | (-10.289, -9.026] | 0.702531 | 13.953099 | 7.327815 |
# constants
RMIN = 10
DLNR = 0.5
df['dx'] = df['x[0]'] - 1699 # jesisim output fitsfiles have shape 3398, 3398
df['dy'] = df['x[1]'] - 1699
df['r'] = np.hypot(df['dx'], df['dy'])
# df['r'] = np.sqrt(df['dx']**2 + df['dy']**2)
df['cos2theta'] = df.eval(' (dx * dx - dy * dy) / (r * r)' )
df['sin2theta'] = df.eval(' (2 * dx * dy ) / (r * r)' )
df['bin'] = ( np.log(df.r / RMIN) / DLNR).astype(int)
df['bin'].value_counts()
9 969494 10 887481 8 389214 7 147082 6 45662 5 4581 3 2048 2 672 4 344 1 161 0 75 -1 8 -2 3 -3 1 Name: bin, dtype: int64
df['eX_mono'] = df['sin2theta'] * df['gm[0]'] + df['cos2theta'] * df['gm[1]']
df['eT_mono'] = -1 * (df['cos2theta'] * df['gm[0]'] + df['sin2theta'] * df['gm[1]'] )
df['eX_chro'] = df['sin2theta'] * df['gc[0]'] + df['cos2theta'] * df['gc[1]']
df['eT_chro'] = -1 * (df['cos2theta'] * df['gc[0]'] + df['sin2theta'] * df['gc[1]'] )
df['eT_mono_times_wt'] = df['eT_mono'] * df['wt_mag']
df['eT_chro_times_wt'] = df['eT_chro'] * df['wt_mag']
df['eX_monosq_times_wt'] = df['eX_mono'] * df['eX_mono'] * df['wt_mag']
df['eX_chrosq_times_wt'] = df['eX_chro'] * df['eX_chro'] * df['wt_mag']
df['eXsq_times_wt'] = df.eval(" (eX_monosq_times_wt + eX_chrosq_times_wt) / 2 ")
df.iloc[:2,-6:]
| eT_chro | eT_mono_times_wt | eT_chro_times_wt | eX_monosq_times_wt | eX_chrosq_times_wt | eXsq_times_wt | |
|---|---|---|---|---|---|---|
| 0 | 0.063475 | -0.231243 | -0.231395 | -0.006740 | -0.003102 | -0.004921 |
| 1 | 0.360780 | 3.250454 | 2.643731 | 0.045375 | 0.055413 | 0.050394 |
https://www.phenix.bnl.gov/WWW/publish/elke/EIC/Files-for-Wiki/lara.02-008.errors.pdf
When the statistics involved in calculating $E_A$ and $E_B$ are not indendent, the error for a function $f(E_A, E_B)$ has the expression: $$ \sigma_{f}=\sqrt{\left(\frac{\partial f}{\partial E_{A}} \sigma_{A}\right)^{2}+\left(\frac{\partial f}{\partial E_{B}} \sigma_{B}\right)^{2}+2 \frac{\partial f}{\partial E_{A}} \frac{\partial f}{\partial E_{B}} \operatorname{cov}\left(E_{A}, E_{B}\right)} $$
where the last term takes care of the correlations between $E_A$ and $E_B$. Given a large number $N$ of measurements $E_{A_i}$, the standard deviation $\sigma_A$ is empirically defined as:
$$ \sigma_{A}^{2}=\frac{1}{N-1} \sum_{i=1}^{N}\left(E_{A_{i}}-E_{A}\right)^{2} $$while the covariance between $E_A$ and $E_B$ is given by: $$ \operatorname{cov}\left(E_{A}, E_{B}\right)=\frac{1}{N-1} \sum_{i=1}^{N}\left(E_{A_{i}}-E_{A}\right)\left(E_{B_{i}}-E_{B}\right) $$
where $E_A$ and $E_B$ are the averages of $E_{A_i}$ and $E_{B_i}$. When $E_A$ and $E_B$ are independent, over a large number $N$ of measurements they will fluctuate around their average in an uncorrelated way, so that the covariance is zero and one recovers the usual formula for the propagation of errors in a function of independent variables.
"""
f = m/c
df/dm = 1/c
df/dc = -m/c2
s2 ==> sigma
s2 = r2 (sm2/m2 + sc2/c2 -2/m/c cov(m,c))
put m=c,
s2 = 2/m2 (sm2 - cov)
error = sigma/sqrt(n)
error =
0.000332
sqrt --------
eT(bin)**2
---------------------
sqrt(ngals_bin)
""";
cov = df[['eT_chro','eT_mono']].cov()
cov
| eT_chro | eT_mono | |
|---|---|---|
| eT_chro | 0.030666 | 0.027496 |
| eT_mono | 0.027496 | 0.042076 |
diag_mean = np.diag(cov).mean()
diag_mean
0.03637056646471716
offdiag = cov.iloc[0,1]
offdiag
0.027495518003083506
diag_mean - offdiag
0.008875048461633655
myerr = (diag_mean - offdiag) * 2
myerr
0.01775009692326731
# temporary value to calculate error
# err_coeff = 0.000332
err_coeff = myerr
df['err_numerator'] = myerr # np.sqrt(err_coeff/df['eT_mono']/df['eT_mono'])
df['eX_monosq_times_wt_std'] = df['eX_monosq_times_wt'].std()
df['eX_chrosq_times_wt_std'] = df['eX_chrosq_times_wt'].std()
df['eT_ratio'] = df['eT_mono'] / df['eT_chro']
df['eT_ratio_std'] = df['eT_ratio'].std()
df.iloc[:2,-5:]
| err_numerator | eX_monosq_times_wt_std | eX_chrosq_times_wt_std | eT_ratio | eT_ratio_std | |
|---|---|---|---|---|---|
| 0 | 0.01775 | 276.190868 | 320.208299 | 0.999345 | 232.5795 |
| 1 | 0.01775 | 276.190868 | 320.208299 | 1.229495 | 232.5795 |
# constants
RMIN = 10
DLNR = 0.5
# renaming aggegration needs pandas > 0.25
pd.__version__
'1.1.4'
df.filter(regex='times|err_numerator').head().round(1)
| eT_mono_times_wt | eT_chro_times_wt | eX_monosq_times_wt | eX_chrosq_times_wt | eXsq_times_wt | err_numerator | eX_monosq_times_wt_std | eX_chrosq_times_wt_std | |
|---|---|---|---|---|---|---|---|---|
| 0 | -0.2 | -0.2 | -0.0 | -0.0 | -0.0 | 0.0 | 276.2 | 320.2 |
| 1 | 3.3 | 2.6 | 0.0 | 0.1 | 0.1 | 0.0 | 276.2 | 320.2 |
| 2 | 12.2 | 5.5 | -4.3 | -0.5 | -2.4 | 0.0 | 276.2 | 320.2 |
| 3 | -4.7 | 0.9 | 2.6 | 0.1 | 1.4 | 0.0 | 276.2 | 320.2 |
| 4 | 3.9 | 2.6 | 1.2 | 0.6 | 0.9 | 0.0 | 276.2 | 320.2 |
df_radial_bins = df.groupby('bin').agg(
r_mean = ('r', 'mean'),
wt_mag_sum = ('wt_mag', 'sum'),
eT_mono_times_wt_sum = ('eT_mono_times_wt', 'sum'),
eT_chro_times_wt_sum = ('eT_chro_times_wt', 'sum'),
eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
err_numerator_mean = ('err_numerator', 'mean'))
# bin counts
df_radial_bins['bin_count'] = df['bin'].value_counts()
# columns after binning
df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eX_mean_mono'] = np.sqrt(df_radial_bins['eX_monosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eX_mean_chro'] = np.sqrt(df_radial_bins['eX_chrosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eT_ratio'] = (df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro'])
# Errors
sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro']) /
sqrt_bin)
print('Statistics for different radial bins')
print(f'RMIN = {RMIN} and DLNR = {DLNR}')
df_radial_bins.style\
.background_gradient(subset=['eT_ratio_err'],cmap='Blues')
Statistics for different radial bins RMIN = 10 and DLNR = 0.5
/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/pandas/core/series.py:726: RuntimeWarning: invalid value encountered in sqrt
| r_mean | wt_mag_sum | eT_mono_times_wt_sum | eT_chro_times_wt_sum | eX_monosq_times_wt_sum | eX_chrosq_times_wt_sum | err_numerator_mean | bin_count | eT_mean_mono | eT_mean_chro | eX_mean_mono | eX_mean_chro | eT_ratio | eT_mono_err | eT_chro_err | eT_ratio_err | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| bin | ||||||||||||||||
| -3 | 1.948903 | 11.375522 | -5.891466 | -5.661695 | 4.142085 | 3.855327 | 0.017750 | 1 | -0.517907 | -0.497709 | 0.603426 | 0.582163 | 1.040584 | 0.603426 | 0.582163 | 0.262414 |
| -2 | 2.947633 | 54.597337 | 4.673825 | 3.214389 | 12.056320 | 9.394004 | 0.017750 | 3 | 0.085605 | 0.058874 | 0.469918 | 0.414801 | 1.454032 | 0.271307 | 0.239485 | 1.083492 |
| -1 | 4.970653 | 41.732651 | -15.918771 | -12.038013 | 11.756387 | 8.747870 | 0.017750 | 8 | -0.381446 | -0.288455 | 0.530761 | 0.457839 | 1.322375 | 0.187652 | 0.161871 | 0.142004 |
| 0 | 11.604925 | 1059.034083 | -25.333227 | -24.859748 | 165.052955 | 116.920270 | 0.017750 | 75 | -0.023921 | -0.023474 | 0.394781 | 0.332269 | 1.019046 | 0.045585 | 0.038367 | 0.649211 |
| 1 | 22.493970 | -3.868282 | 546.613962 | 340.353898 | 99.846771 | 81.729154 | 0.017750 | 161 | -141.306630 | -87.985792 | nan | nan | 1.606016 | nan | nan | 0.000094 |
| 2 | 37.918810 | 1731.907748 | 1482.865500 | 1031.514353 | 89.445519 | 348.492574 | 0.017750 | 672 | 0.856204 | 0.595594 | 0.227257 | 0.448574 | 1.437562 | 0.008767 | 0.017304 | 0.007197 |
| 3 | 58.365524 | -617700.777490 | -331604.478282 | -327201.043000 | -1460.152432 | -3418.517108 | 0.017750 | 2048 | 0.536837 | 0.529708 | 0.048619 | 0.074393 | 1.013458 | 0.001074 | 0.001644 | 0.005521 |
| 4 | 81.771466 | -4960.735963 | -2893.667327 | -1730.994063 | -666.303293 | -568.567225 | 0.017750 | 344 | 0.583314 | 0.348939 | 0.366491 | 0.338546 | 1.671680 | 0.019760 | 0.018253 | 0.015922 |
| 5 | 179.806942 | 17251.687587 | 7492.602297 | 5765.842584 | 1632.798508 | 1238.313992 | 0.017750 | 4581 | 0.434311 | 0.334219 | 0.307645 | 0.267917 | 1.299481 | 0.004545 | 0.003958 | 0.005167 |
| 6 | 277.107102 | 543182.190770 | 269023.153491 | 229977.564759 | 118009.655740 | 84996.135574 | 0.017750 | 45662 | 0.495272 | 0.423389 | 0.466107 | 0.395573 | 1.169780 | 0.002181 | 0.001851 | 0.001362 |
| 7 | 447.811099 | -39354.071999 | -63420.937027 | -175877.912421 | 214.982462 | -50595.081756 | 0.017750 | 147082 | 1.611547 | 4.469116 | nan | 1.133860 | 0.360596 | nan | 0.002957 | 0.000129 |
| 8 | 735.318261 | 7238533.676309 | 1056377.818268 | 883105.804308 | 274717.188316 | 232305.707735 | 0.017750 | 389214 | 0.145938 | 0.122001 | 0.194813 | 0.179145 | 1.196208 | 0.000312 | 0.000287 | 0.001600 |
| 9 | 1211.025659 | -10320754.110080 | -2877988.076366 | -2274344.672726 | -286503.886284 | -323150.200053 | 0.017750 | 969494 | 0.278854 | 0.220366 | 0.166613 | 0.176948 | 1.265414 | 0.000169 | 0.000180 | 0.000546 |
| 10 | 1741.222665 | 5627733.361806 | 71164.440166 | 207613.432978 | 284507.018714 | 215361.243866 | 0.017750 | 887481 | 0.012645 | 0.036891 | 0.224843 | 0.195622 | 0.342774 | 0.000239 | 0.000208 | 0.006548 |
# why some eT values are -ve?
"""
1. For given rmin and dlnr we have some bins very few object counts.
""";
pd.cut(df['eT_mono'],20).value_counts().to_frame().style.background_gradient()
| eT_mono | |
|---|---|
| (-0.0178, 0.121] | 1007431 |
| (0.121, 0.261] | 582243 |
| (-0.157, -0.0178] | 295737 |
| (0.261, 0.4] | 211191 |
| (-0.296, -0.157] | 105730 |
| (0.4, 0.539] | 89527 |
| (-0.435, -0.296] | 49463 |
| (0.539, 0.678] | 38649 |
| (-0.575, -0.435] | 26555 |
| (0.678, 0.817] | 14032 |
| (-0.714, -0.575] | 12359 |
| (0.817, 0.957] | 4704 |
| (-0.853, -0.714] | 4350 |
| (0.957, 1.096] | 1764 |
| (-0.992, -0.853] | 1554 |
| (-1.131, -0.992] | 609 |
| (1.096, 1.235] | 589 |
| (-1.271, -1.131] | 197 |
| (1.235, 1.374] | 114 |
| (-1.413, -1.271] | 28 |
fig,ax = plt.subplots(figsize=(12,8))
sns.distplot(df['eT_mono'],ax=ax)
plt.title('Histogram and density plot of eT_mono');
/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
df['wt_mag'].hist(bins=100,figsize=(12,8));
plt.title('Histogram of wt_mag')
plt.xlabel('wt_mag')
plt.ylabel('Frequency');
plt.savefig('images/weights.png',dpi=300)
df.query(""" r>500 """)['eT_mono'].describe().to_frame()
| eT_mono | |
|---|---|
| count | 2.284348e+06 |
| mean | 7.756297e-02 |
| std | 1.968697e-01 |
| min | -1.409842e+00 |
| 25% | 2.728620e-03 |
| 50% | 7.973827e-02 |
| 75% | 1.631570e-01 |
| max | 1.374172e+00 |
df['gm_sq'].hist(bins=100,figsize=(12,8))
plt.title('Histogram of gm_sq')
plt.ylabel('Frequency')
plt.xlabel('gm_sq')
plt.savefig('images/gm_sq_hist_after_cleaning.png',dpi=300)
df.head(2)
| fN[0][0] | fN[1][0] | fN[2][0] | fN[3][0] | id[0][0] | id[1][0] | id[2][0] | id[3][0] | x[0] | x[1] | errx[0][0] | errx[0][1] | errx[1][0] | errx[1][1] | errx[2][0] | errx[2][1] | errx[3][0] | errx[3][1] | g[0][0] | g[0][1] | g[1][0] | g[1][1] | g[2][0] | g[2][1] | g[3][0] | g[3][1] | ellip[0][0] | ellip[1][0] | ellip[2][0] | ellip[3][0] | flux[0][0] | flux[1][0] | flux[2][0] | flux[3][0] | radius[0][0] | radius[1][0] | radius[2][0] | radius[3][0] | mag[0][0] | mag[1][0] | mag[2][0] | mag[3][0] | gm[0] | gm[1] | gc[0] | gc[1] | gmd[0] | gmd[1] | gcd[0] | gcd[1] | g_sq | gmd_sq | gm_sq | gc_sq | mag_mono | mag_chro | g_sq_bins | above | bins_mag_mono | bins_mag_chro | wt_mag_mono | wt_mag_chro | wt_mag | dx | dy | r | cos2theta | sin2theta | bin | eX_mono | eT_mono | eX_chro | eT_chro | eT_mono_times_wt | eT_chro_times_wt | eX_monosq_times_wt | eX_chrosq_times_wt | eXsq_times_wt | err_numerator | eX_monosq_times_wt_std | eX_chrosq_times_wt_std | eT_ratio | eT_ratio_std | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 0 | 2135 | 2096 | 2067 | 2082 | 2580.9311 | 2849.5868 | 0.0297 | 0.0319 | 0.0137 | 0.0134 | 0.0299 | 0.0319 | 0.0303 | 0.0306 | -0.0952 | -0.2551 | 0.1531 | 0.1393 | -0.0939 | -0.2453 | 0.1209 | 0.1211 | 0.272285 | 0.206988 | 0.262658 | 0.171120 | 408036.5500 | 406763.6500 | 409150.4800 | 407928.3600 | 4.235435 | 4.206352 | 4.273318 | 4.244144 | -14.026748 | -14.023355 | -14.029708 | -14.026460 | 0.02895 | -0.0579 | 0.0135 | -0.06210 | -0.12415 | -0.1972 | -0.1074 | -0.18320 | 0.074139 | 0.054301 | 0.004191 | 0.004039 | -14.025052 | -14.028084 | (0.04, 0.08] | True | (-14.811, -13.661] | (-14.075, -12.813] | 2.594100 | -9.885015 | -3.645458 | 881.9311 | 1150.5868 | 1449.707712 | -0.259818 | 0.965658 | 9 | 0.042999 | 0.063433 | 0.029171 | 0.063475 | -0.231243 | -0.231395 | -0.006740 | -0.003102 | -0.004921 | 0.01775 | 276.190868 | 320.208299 | 0.999345 | 232.5795 |
| 1 | 0 | 0 | 0 | 0 | 4794 | 4601 | 4395 | 4401 | 2003.4769 | 1691.9412 | 0.7515 | 0.7892 | 1.0306 | 1.5368 | 0.7687 | 0.7990 | 1.0311 | 1.5089 | 0.0423 | 0.1264 | -0.9250 | -0.0098 | 0.0498 | 0.1351 | -0.7656 | 0.0058 | 0.133290 | 0.925052 | 0.143986 | 0.765622 | 5129.8816 | 5145.7756 | 5075.5358 | 5121.1177 | 3.591389 | 3.740641 | 3.593138 | 3.757837 | -9.275268 | -9.278627 | -9.263705 | -9.273412 | -0.44135 | 0.0583 | -0.3579 | 0.07045 | 0.48365 | 0.0681 | 0.4077 | 0.06465 | 0.017766 | 0.238555 | 0.198189 | 0.133056 | -9.276948 | -9.268558 | (0.2, 0.24] | True | (-10.209, -9.059] | (-10.289, -9.026] | 0.702531 | 13.953099 | 7.327815 | 304.4769 | -7.0588 | 304.558712 | 0.998926 | -0.046342 | 6 | 0.078690 | 0.443578 | 0.086960 | 0.360780 | 3.250454 | 2.643731 | 0.045375 | 0.055413 | 0.050394 | 0.01775 | 276.190868 | 320.208299 | 1.229495 | 232.5795 |
def plot_eT_mean(rmin, dlnr,min_bin_count,
show_ratio=True,
show_mono_chro=False,
show_data=True):
# title
title = f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
# radial bin
df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)
# aggregations per given bin
df_radial_bins = df.groupby('bin').agg(
r_mean = ('r', 'mean'),
wt_mag_sum = ('wt_mag', 'sum'),
eT_mono_times_wt_sum = ('eT_mono_times_wt', 'sum'),
eT_chro_times_wt_sum = ('eT_chro_times_wt', 'sum'),
eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
err_numerator_mean = ('err_numerator', 'mean')
)
# bin counts
df_radial_bins['bin_count'] = df['bin'].value_counts()
df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """)
df_radial_bins = df_radial_bins.iloc[:-1,:]
# columns after binning
df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eX_mean_mono'] = np.sqrt(
df_radial_bins['eX_monosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum']
)
df_radial_bins['eX_mean_chro'] = np.sqrt(
df_radial_bins['eX_chrosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum']
)
df_radial_bins['eT_ratio'] = (df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro'])
# Errors
sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro']) /
sqrt_bin)
# also plot eT mono and eT chro
if show_ratio:
fig, ax = plt.subplots(1,1,figsize=(12,8))
df_radial_bins.plot.line(x='r_mean',y='eT_ratio',
yerr='eT_ratio_err',
marker='o',color='b',ax=ax)
ax.set_xlabel('Radius',fontsize=18)
ax.set_ylabel(r'$\frac{eT_{mono}}{eT_{chro}}$',fontsize=18)
plt.ylim(0.95, 1.0)
if show_mono_chro:
fig, ax = plt.subplots(3,1,figsize=(14,12))
# top
df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',
yerr='eT_mono_err',
marker='o',color='r', ax=ax[0])
# top
df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',
yerr='eT_chro_err',
marker='o',color='b',ax=ax[0])
# middle
df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',
yerr='eT_mono_err',
marker='o',color='r', ax=ax[1])
# bottom
df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',
yerr='eT_chro_err',
marker='o',color='b',ax=ax[2])
# label
ax[0].set_xlabel('')
ax[1].set_xlabel('')
ax[2].set_xlabel('Radius',fontsize=18)
ax[0].set_ylabel('eT',fontsize=18)
ax[1].set_ylabel('eT_mean_mono',fontsize=18)
ax[2].set_ylabel('eT_mean_chro',fontsize=18)
ax[0].set_xlim(0,1800)
ax[1].set_xlim(0,1800)
ax[2].set_xlim(0,1800)
plt.suptitle(f'Plot of eT for {title}',fontsize=24,weight='bold')
# also show the dataframe
if show_data:
display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))
plt.xlim(0,1800)
plt.savefig('images/eT_versus_radius.png',dpi=300)
plt.tight_layout()
plt.show()
# plot now
plot_eT_mean(rmin=300, dlnr=0.5,min_bin_count=10,
show_ratio=True,show_mono_chro=False)
/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/pandas/core/series.py:726: RuntimeWarning: invalid value encountered in sqrt
| r_mean | wt_mag_sum | eT_mono_times_wt_sum | eT_chro_times_wt_sum | eX_monosq_times_wt_sum | eX_chrosq_times_wt_sum | err_numerator_mean | bin_count | eT_mean_mono | eT_mean_chro | eX_mean_mono | eX_mean_chro | eT_ratio | eT_mono_err | eT_chro_err | eT_ratio_err | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| bin | ||||||||||||||||
| -7 | 7.419411 | 223.498125 | 34.318911 | 41.180904 | 25.147389 | 23.248396 | 0.017750 | 19 | 0.153553 | 0.184256 | 0.335436 | 0.322522 | 0.833370 | 0.076954 | 0.073992 | 0.181712 |
| -6 | 11.816355 | 848.726375 | -118.100697 | -122.688828 | 147.494507 | 101.828541 | 0.017750 | 44 | -0.139150 | -0.144556 | 0.416873 | 0.346378 | 0.962604 | 0.062846 | 0.052219 | 0.141616 |
| -5 | 20.508234 | -67.429432 | 565.977666 | 369.903879 | 66.292368 | 53.479376 | 0.017750 | 128 | -8.393629 | -5.485793 | nan | nan | 1.530067 | nan | nan | 0.001735 |
| -4 | 34.300235 | 878.372384 | 1273.630422 | 885.458177 | -21.112088 | 219.574148 | 0.017750 | 481 | 1.449989 | 1.008067 | nan | 0.499978 | 1.438386 | nan | 0.022797 | 0.005025 |
| -3 | 54.146573 | -613228.767281 | -331328.414934 | -327890.508187 | -1156.531908 | -3334.858389 | 0.017750 | 1917 | 0.540301 | 0.534695 | 0.043428 | 0.073744 | 1.010485 | 0.000992 | 0.001684 | 0.005661 |
| -2 | 75.590545 | -6427.163953 | -1264.832913 | 732.533939 | -585.518324 | -274.396940 | 0.017750 | 709 | 0.196795 | -0.113975 | 0.301829 | 0.206624 | -1.726654 | 0.011335 | 0.007760 | nan |
| -1 | 165.288307 | 18594.724267 | 11106.656189 | 9528.123506 | 4738.138691 | 4114.276322 | 0.017750 | 2118 | 0.597301 | 0.512410 | 0.504788 | 0.470383 | 1.165671 | 0.010968 | 0.010221 | 0.005233 |
| 0 | 372.435112 | 725254.590417 | 391601.740762 | 252990.290643 | 127178.757719 | 77571.464869 | 0.017750 | 152837 | 0.539951 | 0.348830 | 0.418757 | 0.327044 | 1.547892 | 0.001071 | 0.000837 | 0.000785 |
| 1 | 666.051083 | 6183060.248611 | 727663.546594 | 587697.762770 | 223957.351168 | 146085.385404 | 0.017750 | 323252 | 0.117687 | 0.095050 | 0.190318 | 0.153710 | 1.238159 | 0.000335 | 0.000270 | 0.002216 |
| 2 | 1096.386981 | -10327524.536444 | -2846449.667217 | -2354262.446732 | -298267.242895 | -332981.370410 | 0.017750 | 810184 | 0.275618 | 0.227960 | 0.169944 | 0.179561 | 1.209062 | 0.000189 | 0.000199 | 0.000591 |
| 3 | 1657.326482 | 6669539.589810 | 158865.453966 | 367390.431686 | 330295.323603 | 262911.213051 | 0.017750 | 1139272 | 0.023820 | 0.055085 | 0.222538 | 0.198544 | 0.432416 | 0.000208 | 0.000186 | 0.003446 |
plot_eT_mean(rmin=300, dlnr=0.5,min_bin_count=10,
show_ratio=False,show_mono_chro=True)
| r_mean | wt_mag_sum | eT_mono_times_wt_sum | eT_chro_times_wt_sum | eX_monosq_times_wt_sum | eX_chrosq_times_wt_sum | err_numerator_mean | bin_count | eT_mean_mono | eT_mean_chro | eX_mean_mono | eX_mean_chro | eT_ratio | eT_mono_err | eT_chro_err | eT_ratio_err | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| bin | ||||||||||||||||
| -7 | 7.419411 | 223.498125 | 34.318911 | 41.180904 | 25.147389 | 23.248396 | 0.017750 | 19 | 0.153553 | 0.184256 | 0.335436 | 0.322522 | 0.833370 | 0.076954 | 0.073992 | 0.181712 |
| -6 | 11.816355 | 848.726375 | -118.100697 | -122.688828 | 147.494507 | 101.828541 | 0.017750 | 44 | -0.139150 | -0.144556 | 0.416873 | 0.346378 | 0.962604 | 0.062846 | 0.052219 | 0.141616 |
| -5 | 20.508234 | -67.429432 | 565.977666 | 369.903879 | 66.292368 | 53.479376 | 0.017750 | 128 | -8.393629 | -5.485793 | nan | nan | 1.530067 | nan | nan | 0.001735 |
| -4 | 34.300235 | 878.372384 | 1273.630422 | 885.458177 | -21.112088 | 219.574148 | 0.017750 | 481 | 1.449989 | 1.008067 | nan | 0.499978 | 1.438386 | nan | 0.022797 | 0.005025 |
| -3 | 54.146573 | -613228.767281 | -331328.414934 | -327890.508187 | -1156.531908 | -3334.858389 | 0.017750 | 1917 | 0.540301 | 0.534695 | 0.043428 | 0.073744 | 1.010485 | 0.000992 | 0.001684 | 0.005661 |
| -2 | 75.590545 | -6427.163953 | -1264.832913 | 732.533939 | -585.518324 | -274.396940 | 0.017750 | 709 | 0.196795 | -0.113975 | 0.301829 | 0.206624 | -1.726654 | 0.011335 | 0.007760 | nan |
| -1 | 165.288307 | 18594.724267 | 11106.656189 | 9528.123506 | 4738.138691 | 4114.276322 | 0.017750 | 2118 | 0.597301 | 0.512410 | 0.504788 | 0.470383 | 1.165671 | 0.010968 | 0.010221 | 0.005233 |
| 0 | 372.435112 | 725254.590417 | 391601.740762 | 252990.290643 | 127178.757719 | 77571.464869 | 0.017750 | 152837 | 0.539951 | 0.348830 | 0.418757 | 0.327044 | 1.547892 | 0.001071 | 0.000837 | 0.000785 |
| 1 | 666.051083 | 6183060.248611 | 727663.546594 | 587697.762770 | 223957.351168 | 146085.385404 | 0.017750 | 323252 | 0.117687 | 0.095050 | 0.190318 | 0.153710 | 1.238159 | 0.000335 | 0.000270 | 0.002216 |
| 2 | 1096.386981 | -10327524.536444 | -2846449.667217 | -2354262.446732 | -298267.242895 | -332981.370410 | 0.017750 | 810184 | 0.275618 | 0.227960 | 0.169944 | 0.179561 | 1.209062 | 0.000189 | 0.000199 | 0.000591 |
| 3 | 1657.326482 | 6669539.589810 | 158865.453966 | 367390.431686 | 330295.323603 | 262911.213051 | 0.017750 | 1139272 | 0.023820 | 0.055085 | 0.222538 | 0.198544 | 0.432416 | 0.000208 | 0.000186 | 0.003446 |
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
df.filter(regex='wt').head(2)
| wt_mag_mono | wt_mag_chro | wt_mag | eT_mono_times_wt | eT_chro_times_wt | eX_monosq_times_wt | eX_chrosq_times_wt | eXsq_times_wt | eX_monosq_times_wt_std | eX_chrosq_times_wt_std | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2.594100 | -9.885015 | -3.645458 | -0.231243 | -0.231395 | -0.006740 | -0.003102 | -0.004921 | 276.190868 | 320.208299 |
| 1 | 0.702531 | 13.953099 | 7.327815 | 3.250454 | 2.643731 | 0.045375 | 0.055413 | 0.050394 | 276.190868 | 320.208299 |
# I need pandas > 0.25 for multiple agg
pd.__version__
'1.1.4'
def plotly_eT_plot(rmin=400, dlnr=0.4, min_bin_count=20,
show_mono_chro=False,
show_mono=False,
show_chro=False,
show_data=True,
):
# title
title=f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
# radial bin
df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)
# aggregations per given bin
df_radial_bins = df.groupby('bin').agg(
r_mean = ('r', 'mean'),
wt_mag_sum = ('wt_mag', 'sum'),
eT_mono_times_wt_sum = ('eT_mono_times_wt', 'sum'),
eT_chro_times_wt_sum = ('eT_chro_times_wt', 'sum'),
eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
err_numerator_mean = ('err_numerator', 'mean')
)
# bin counts
df_radial_bins['bin_count'] = df['bin'].value_counts()
df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """)
df_radial_bins = df_radial_bins.iloc[:-1,:]
# columns after binning
df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eX_mean_mono'] = np.sqrt(
df_radial_bins['eX_monosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum']
)
df_radial_bins['eX_mean_chro'] = np.sqrt(
df_radial_bins['eX_chrosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum']
)
df_radial_bins['eT_ratio'] = (df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro'])
# Errors
sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro']) /
sqrt_bin)
# remove first bin of strong lensing
df_radial_bins = df_radial_bins.iloc[1:,:]
# eT mono vs chro ratio
sc = go.Scatter( x=df_radial_bins['r_mean'],
y=df_radial_bins['eT_ratio'],
mode = 'lines+markers',
name = 'eT ratio',
error_y = dict(
type='data',
array=df_radial_bins['eT_ratio_err'],
visible=True,
)
)
mydata = [sc]
# layout
layout = go.Layout(
title={
'text': title,
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis=dict(
title='radius',
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f')),
yaxis=dict(title='eT',
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f')))
myfig = go.Figure(data=mydata, layout=layout)
# also plot eT mono and chro
# monochromatic
sc1 = go.Scatter(x=df_radial_bins['r_mean'],
y=df_radial_bins['eT_mean_mono'],
mode = 'lines+markers',
name = 'eT mono',
error_y = dict(
type='data',
array=df_radial_bins['eT_mono_err'],
visible=True,
)
)
# chromatic
sc2 = go.Scatter(x=df_radial_bins['r_mean'],
y=df_radial_bins['eT_mean_chro'],
mode = 'lines+markers',
name = 'eT chro',
error_y = dict(
type='data',
array=df_radial_bins['eT_chro_err'],
visible=True,
)
)
if show_mono_chro:
mydata= [sc1,sc2]
myfig = go.Figure(data=mydata, layout=layout)
if show_mono:
mydata = [sc1]
myfig = go.Figure(data=mydata, layout=layout)
myfig.update_layout(
xaxis_title="Radius",
yaxis_title="eT_mono",
)
myfig.update_layout(
title={
'text': "eT_mono vs radius plot",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'}
)
if show_chro:
mydata = [sc2]
myfig = go.Figure(data=mydata, layout=layout)
myfig.update_layout(
xaxis_title="Radius",
yaxis_title="eT_chro",
)
myfig.update_layout(
title={
'text': "eT_chro vs radius plot",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
# this is for all cases
myfig.update_layout(showlegend=True)
# also show the dataframe
if show_data:
display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))
# iplot plots in jupyter-notebook, plot opens in new tab.
return iplot(myfig, filename='myplot.html')
# plot
plotly_eT_plot(rmin=100, dlnr=0.2, min_bin_count=20,
show_mono_chro=False,
show_mono=False,
show_chro=False,
show_data=True,
)
/Users/poudel/opt/miniconda3/envs/dataSc/lib/python3.7/site-packages/pandas/core/series.py:726: RuntimeWarning: invalid value encountered in sqrt
| r_mean | wt_mag_sum | eT_mono_times_wt_sum | eT_chro_times_wt_sum | eX_monosq_times_wt_sum | eX_chrosq_times_wt_sum | err_numerator_mean | bin_count | eT_mean_mono | eT_mean_chro | eX_mean_mono | eX_mean_chro | eT_ratio | eT_mono_err | eT_chro_err | eT_ratio_err | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| bin | ||||||||||||||||
| -8 | 18.420042 | 394.116744 | -44.329227 | -56.171026 | 42.247473 | 29.241921 | 0.017750 | 41 | -0.112477 | -0.142524 | 0.327407 | 0.272390 | 0.789183 | 0.051132 | 0.042540 | 0.164336 |
| -7 | 22.666322 | -477.216896 | 551.034198 | 368.761914 | 29.859592 | 30.203667 | 0.017750 | 72 | -1.154683 | -0.772734 | nan | nan | 1.494282 | nan | nan | 0.016622 |
| -6 | 27.392622 | 516.517239 | -12.063079 | -21.281920 | 129.209842 | 104.078344 | 0.017750 | 99 | -0.023355 | -0.041203 | 0.500156 | 0.448888 | 0.566823 | 0.050268 | 0.045115 | 0.431651 |
| -5 | 33.838268 | -1040.235981 | 1431.875164 | 1011.663831 | -263.390153 | 10.051915 | 0.017750 | 209 | -1.376491 | -0.972533 | 0.503192 | nan | 1.415367 | 0.034807 | nan | 0.007965 |
| -4 | 41.218339 | 2421.271836 | 99.787293 | 65.238169 | 253.021747 | 258.523852 | 0.017750 | 418 | 0.041213 | 0.026944 | 0.323264 | 0.326760 | 1.529585 | 0.015811 | 0.015982 | 0.195554 |
| -3 | 50.259898 | 9157.846261 | -127.263242 | 40.044044 | 660.832182 | 558.325607 | 0.017750 | 770 | -0.013897 | 0.004373 | 0.268627 | 0.246915 | -3.178082 | 0.009681 | 0.008898 | nan |
| -2 | 60.542867 | -623367.605594 | -331441.359274 | -328096.250400 | -1956.499063 | -4029.885805 | 0.017750 | 906 | 0.531695 | 0.526329 | 0.056023 | 0.080403 | 1.010196 | 0.001861 | 0.002671 | 0.008367 |
| -1 | 72.890007 | -2839.261881 | 23.808578 | 936.486582 | -56.438429 | 203.698690 | 0.017750 | 579 | -0.008385 | -0.329835 | 0.140989 | nan | 0.025423 | 0.005859 | nan | 0.105281 |
| 0 | 89.174069 | -5707.330033 | -2951.907469 | -1810.312361 | -776.320833 | -721.382859 | 0.017750 | 130 | 0.517213 | 0.317191 | 0.368811 | 0.355522 | 1.630607 | 0.032347 | 0.031181 | 0.028849 |
| 1 | 139.673587 | -2294.077973 | -1487.983131 | -1203.345279 | -381.860188 | -171.058535 | 0.017750 | 273 | 0.648619 | 0.524544 | 0.407989 | 0.273066 | 1.236539 | 0.024693 | 0.016527 | 0.013824 |
| 2 | 169.294762 | 23311.500427 | 14439.085646 | 12499.595206 | 5461.927989 | 4587.761594 | 0.017750 | 1856 | 0.619398 | 0.536199 | 0.484047 | 0.443624 | 1.155164 | 0.011236 | 0.010297 | 0.005366 |
| 3 | 204.898275 | 64652.514138 | 32109.294568 | 28570.131882 | 5991.343633 | 5377.688435 | 0.017750 | 6688 | 0.496644 | 0.441903 | 0.304417 | 0.288407 | 1.123876 | 0.003722 | 0.003527 | 0.003477 |
| 4 | 248.735242 | 59329.130388 | 24312.506215 | 23754.742805 | 10066.178354 | 17295.709867 | 0.017750 | 14831 | 0.409790 | 0.400389 | 0.411906 | 0.539927 | 1.023480 | 0.003382 | 0.004434 | 0.002701 |
| 5 | 303.757330 | 421417.108450 | 208609.589375 | 173228.823844 | 98968.002061 | 59442.776270 | 0.017750 | 27068 | 0.495019 | 0.411063 | 0.484609 | 0.375572 | 1.204243 | 0.002946 | 0.002283 | 0.001795 |
| 6 | 369.081912 | -191325.802449 | -100595.293513 | -85692.548843 | -32475.060250 | -22603.872090 | 0.017750 | 42300 | 0.525780 | 0.447888 | 0.411991 | 0.343720 | 1.173909 | 0.002003 | 0.001671 | 0.001335 |
| 7 | 452.859022 | 374362.266999 | 226917.317012 | 112746.471177 | 44373.546180 | 17809.110033 | 0.017750 | 62465 | 0.606144 | 0.301169 | 0.344283 | 0.218110 | 2.012633 | 0.001378 | 0.000873 | 0.001248 |
| 8 | 552.373026 | 964732.679832 | -102250.284746 | -114351.048808 | 32017.682016 | -23972.167567 | 0.017750 | 97221 | -0.105988 | -0.118531 | 0.182176 | nan | 0.894179 | 0.000584 | nan | 0.003812 |
| 9 | 673.377725 | 1142244.159462 | 132624.193192 | 111767.200684 | 16020.719988 | 29408.484933 | 0.017750 | 134746 | 0.116108 | 0.097849 | 0.118430 | 0.160456 | 1.186611 | 0.000323 | 0.000437 | 0.003405 |
| 10 | 822.793139 | 4910077.273219 | 837432.980964 | 683169.409450 | 215256.403566 | 181595.029135 | 0.017750 | 202193 | 0.170554 | 0.139136 | 0.209379 | 0.192313 | 1.225806 | 0.000466 | 0.000428 | 0.001923 |
| 11 | 1004.215462 | 286053.341526 | -481970.278616 | -107660.292121 | -63657.189168 | 86839.825981 | 0.017750 | 291118 | -1.684897 | -0.376364 | nan | 0.550980 | 4.476769 | nan | 0.001021 | 0.000310 |
| 12 | 1228.004423 | -11484728.309429 | -2499124.522634 | -2338735.066754 | -272603.493162 | -459798.493354 | 0.017750 | 411027 | 0.217604 | 0.203639 | 0.154065 | 0.200089 | 1.068580 | 0.000240 | 0.000312 | 0.000987 |
| 13 | 1500.274031 | 3044173.576675 | 248467.326098 | 274119.749095 | 192478.210741 | 151372.116847 | 0.017750 | 604952 | 0.081621 | 0.090047 | 0.251453 | 0.222991 | 0.906419 | 0.000323 | 0.000287 | 0.001998 |
| 14 | 1788.614494 | 2783913.430631 | -87968.861175 | 99006.817738 | 112402.369038 | 85446.236506 | 0.017750 | 445043 | -0.031599 | 0.035564 | 0.200937 | 0.175194 | -0.888513 | 0.000301 | 0.000263 | nan |
plotly_eT_plot(rmin=100, dlnr=0.25,min_bin_count=20,show_mono_chro=True)
| r_mean | wt_mag_sum | eT_mono_times_wt_sum | eT_chro_times_wt_sum | eX_monosq_times_wt_sum | eX_chrosq_times_wt_sum | err_numerator_mean | bin_count | eT_mean_mono | eT_mean_chro | eX_mean_mono | eX_mean_chro | eT_ratio | eT_mono_err | eT_chro_err | eT_ratio_err | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| bin | ||||||||||||||||
| -7 | 15.662213 | 173.979233 | 42.718448 | 45.813532 | 12.077159 | 6.366144 | 0.017750 | 31 | 0.245538 | 0.263328 | 0.263472 | 0.191289 | 0.932442 | 0.047321 | 0.034356 | 0.094105 |
| -6 | 19.944983 | 766.204372 | -230.228617 | -205.073111 | 55.916866 | 39.176424 | 0.017750 | 64 | -0.300479 | -0.267648 | 0.270146 | 0.226121 | 1.122666 | 0.033768 | 0.028265 | 0.058725 |
| -5 | 25.429208 | -798.402503 | 777.045486 | 543.318673 | 41.681862 | 38.581154 | 0.017750 | 112 | -0.973250 | -0.680507 | nan | nan | 1.430184 | nan | nan | 0.015469 |
| -4 | 33.309525 | -663.840841 | 1390.977442 | 972.103169 | -168.108581 | 90.406131 | 0.017750 | 238 | -2.095348 | -1.464362 | 0.503226 | nan | 1.430895 | 0.032619 | nan | 0.004930 |
| -3 | 42.519721 | 4657.141525 | -121.559294 | -101.541198 | 541.450198 | 438.847120 | 0.017750 | 570 | -0.026102 | -0.021803 | 0.340973 | 0.306971 | 1.197143 | 0.014282 | 0.012858 | 0.233920 |
| -2 | 54.048938 | -616930.172788 | -330792.327802 | -326642.270092 | -1586.829508 | -3445.674937 | 0.017750 | 1091 | 0.536191 | 0.529464 | 0.050716 | 0.074734 | 1.012705 | 0.001535 | 0.002263 | 0.007570 |
| -1 | 67.998835 | -1856.707864 | -233.019138 | -64.159122 | -103.923753 | -59.228618 | 0.017750 | 930 | 0.125501 | 0.034555 | 0.236584 | 0.178605 | 3.631894 | 0.007758 | 0.005857 | 0.066340 |
| 0 | 89.184853 | -6248.255016 | -3245.669236 | -2051.006541 | -665.887542 | -614.867632 | 0.017750 | 233 | 0.519452 | 0.328253 | 0.326453 | 0.313698 | 1.582476 | 0.021387 | 0.020551 | 0.021137 |
| 1 | 152.503244 | 7076.857298 | 3657.309393 | 3393.444115 | 2639.220486 | 2337.857854 | 0.017750 | 824 | 0.516799 | 0.479513 | 0.610686 | 0.574763 | 1.077757 | 0.021274 | 0.020023 | 0.009323 |
| 2 | 193.083782 | 7191.964315 | 1815.201293 | 1581.401016 | -263.066478 | 502.871330 | 0.017750 | 5687 | 0.252393 | 0.219884 | nan | 0.264426 | 1.147844 | nan | 0.003506 | 0.007499 |
| 3 | 244.526665 | 130773.160099 | 63896.033968 | 58640.462471 | 18701.221987 | 24199.575728 | 0.017750 | 17116 | 0.488602 | 0.448414 | 0.378160 | 0.430174 | 1.089624 | 0.002891 | 0.003288 | 0.002176 |
| 4 | 313.244629 | 522911.286649 | 235337.728099 | 195677.921681 | 105664.857418 | 64156.221358 | 0.017750 | 36480 | 0.450053 | 0.374209 | 0.449522 | 0.350272 | 1.202679 | 0.002354 | 0.001834 | 0.001700 |
| 5 | 399.771527 | 69180.657736 | 73622.854484 | -21280.226801 | 545.121086 | -14817.271062 | 0.017750 | 59920 | 1.064212 | -0.307604 | 0.088768 | nan | -3.459684 | 0.000363 | nan | nan |
| 6 | 514.891284 | 666539.930035 | -98444.274831 | -106842.638606 | 18735.496197 | -25334.199797 | 0.017750 | 104266 | -0.147694 | -0.160294 | 0.167656 | nan | 0.921395 | 0.000519 | nan | 0.002682 |
| 7 | 658.919609 | 1452798.537874 | 154789.213569 | 130143.841780 | 33959.415294 | 36079.581080 | 0.017750 | 163134 | 0.106546 | 0.089581 | 0.152889 | 0.157590 | 1.189370 | 0.000379 | 0.000390 | 0.003376 |
| 8 | 847.787531 | 6039507.887663 | 898164.214628 | 739101.235777 | 246438.908139 | 198397.922631 | 0.017750 | 266846 | 0.148715 | 0.122378 | 0.202001 | 0.181246 | 1.215211 | 0.000391 | 0.000351 | 0.001912 |
| 9 | 1087.538723 | -10794072.993734 | -2731279.982617 | -1969730.128536 | -332252.943674 | -350139.906584 | 0.017750 | 412959 | 0.253035 | 0.182483 | 0.175445 | 0.180106 | 1.386626 | 0.000273 | 0.000280 | 0.000965 |
| 10 | 1398.215786 | 246236.526010 | -189164.688080 | -341358.174720 | 84554.304063 | 75114.912885 | 0.017750 | 655651 | -0.768224 | -1.386302 | 0.585992 | 0.552315 | 0.554153 | 0.000724 | 0.000682 | 0.000159 |
| 11 | 1736.978635 | 4047817.892683 | 39117.100706 | 181887.684887 | 185136.032487 | 122081.786184 | 0.017750 | 618877 | 0.009664 | 0.044935 | 0.213863 | 0.173666 | 0.215062 | 0.000272 | 0.000221 | 0.008127 |
plotly_eT_plot(rmin=100, dlnr=0.25,min_bin_count=20,show_data=False,show_mono=True)
plotly_eT_plot(rmin=100, dlnr=0.25,min_bin_count=20,show_data=False,show_chro=True)
which pip
pip install ipywidgets
jupyter nbextension enable --py --sys-prefix widgetsnbextension
import ipywidgets as widgets
from ipywidgets import HBox, VBox
from ipywidgets import interactive
rmin_ = widgets.IntSlider(min=0, max=500, step=50, value=100)
dlnr_ = widgets.FloatSlider(min=0.1,max=0.8,step=0.05,value=0.25)
min_bin_count_ = widgets.IntSlider(min=10, max=600, step=20, value=20)
interactive_plot = interactive(plotly_eT_plot,
rmin=rmin_,
dlnr=dlnr_,
min_bin_count=min_bin_count_)
output = interactive_plot.children[-1]
interactive_plot
"""
NOTE:
The shape of curve eT vs radius depends heavily on the input parameters
For example:
rmin=100, dlnr=0.25,min_bin_count=20 gives on curve
rmin=200, dlnr=0.35,min_bin_count=20 gives another curve
""";
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
'{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
Time taken to run whole notebook: 0 hr 30 min 56 secs
import subprocess
subprocess.call(['python', '-m', 'nbconvert', '*.ipynb'])
1
!mkdir -p html
!mv *.html html/
mv: rename *.html to html/*.html: No such file or directory
!python get_nbviewer_links.py
python: can't open file 'get_nbviewer_links.py': [Errno 2] No such file or directory